In [ ]:
    
library(forecast)
library(tseries)
    
In [ ]:
    
loadData <- function(dataFolder) {
    files <- list.files(dataFolder)
    data <- list()
    for(file in files) {    
        df <- read.csv(paste0(dataFolder, "/", file), stringsAsFactors=F)    
        minYear <- min(df$Year)
        complaintType <- substr(file,1,(nchar(file))-4)    
        tsObject <- ts(df$Complaints, start=c(minYear, 1), frequency = 12)
        data[[complaintType]] <- tsObject
    }
    data
}
data <- loadData("../../data/topNComplaints")
# change dates if data changes
train_stop <- c(2015, 6)
test_start <- c(2015, 7)
    
In [ ]:
    
series <- data[["Dog menace "]]
series
    
In [ ]:
    
# data before 2012 are too few to consider
train_start <- c(2012, 4)
series <- window(series, start=train_start, end=c(2016, 9))
series
    
In [ ]:
    
tsdisplay(series)
    
In [ ]:
    
plot(series, col="red", lty=2)
lines(tsclean(series), lty=1)
legend("topright", col=c("red", "black"), lty=c(2,1), legend=c("Original", "Cleaned"))
    
In [ ]:
    
series.cleaned <- tsclean(series)
    
There are no significant outliers, and the data is clean.
In [ ]:
    
#Comparing the seasonality of entire dataset and the training data
plot(stl(series, s.window="periodic"))
# The series does look like it has a seasonal component - let's take a look at that.
plot(stl(window(series,end = c(2015,6)), s.window="periodic"))
# The series does look like it has a seasonal component - let's take a look at that.
    
In [ ]:
    
plot(stl(series, s.window=6)) # change s.window to something that make sense
    
In [ ]:
    
# let's take a look at which month this series peaks
# looks like there are two spikes, one in march and the other in september
# the data does contain another spike in between the two - which isn't apparent in the 
# seasonal component
seasonal <- stl(series, s.window=6)$time.series[, 1] # change s.window
plot(seasonal, col="grey")
month1 <- 9 # september
month2 <- 3 # march
for(i in 2012:2016) {    
    abline(v=(month1-1)/12 + i, lty=2)
    abline(v=(month2-1)/12 + i, lty=3)
}
    
In [ ]:
    
# let's superimpose this on the original data, to see if this effect 
# is significant
plot(series, col="grey")
month1 <- 9
month2 <- 3
for(i in 2012:2016) {    
    abline(v=(month1-1)/12 + i, lty=2)
    abline(v=(month2-1)/12 + i, lty=3)
}
    
In [ ]:
    
# this series looks like it fits the data well - since the seasonal component does seem to increase as time progresses
# let's set s.window = "periodic"
stl.fit <- stl(series, s.window="periodic")
series.adj <- seasadj(stl.fit)
tsdisplay(series.adj)
    
In [ ]:
    
stl.cleaned.fit <- stl(series.cleaned, s.window=6)
series.cleaned.adj <- seasadj(stl.cleaned.fit)
tsdisplay(series.cleaned.adj)
    
In [ ]:
    
Acf(series.adj)
    
In [ ]:
    
# the above series is a classic example of a series that requires a diff of order 1, 
# so let's try that out and take a look at the Acf to see if it is overdifferenced
tsdisplay(diff(series.adj, lag=1, differences = 1))
    
In [ ]:
    
# looks like the series has a strong, negative ACF at lag12 
# indicating a possible seasonality? 
# let's also look at d=2
tsdisplay(diff(series.adj, lag = 1, differences = 2))
# this is clearly overdifferenced, so d <= 1
    
In [ ]:
    
# take a look at standard-deviation
sd.0 <- sd(series.adj)
sd.1 <- sd(diff(series.adj, differences = 1))
sd.2 <- sd(diff(series.adj, differences = 2))
print(paste0("SD with d = 0: ", sd.0, ", SD with d = 1: ", sd.1, ", SD with d = 2: ", sd.2))
# in terms of sd, d=1 is a better fit
    
In [ ]:
    
series.diff <- diff(series.adj, lag=1, differences = 1)
    
In [ ]:
    
plot(series.diff, col="grey")
# a 2x4 MA
lines(ma(ma(series.diff, order=2), order=4))
abline(mean(series.diff), 0, col="blue", lty=2)
    
In [ ]:
    
ndiffs(series.adj)
    
Next, we need to estimate p and q. To do this, we take a look at the PACF of the data. Note that this analysis is done on the differenced data. If we decide to fit a model with d=0, then we need to perform this analysis for the un-differenced data as well
In [ ]:
    
# let d=0 first
# looks like a AR(1), MA(4)
Pacf(series.adj)
    
In [ ]:
    
# let's try with d=1
# looks like MA(12) process, possibly an AR(3)
Pacf(series.diff)
    
In [ ]:
    
modelArima <- function(series, order, h, testData = NULL, lambda=NULL) {
    fit <- Arima(series, order=order, lambda = lambda)
    print(summary(fit))
    predictions <- forecast(fit, h, lambda = lambda)
    # compute max and min y
    min.yvalue <- min(min(series), min(testData))
    max.yvalue <- max(max(series), max(testData))
    
    plot(predictions, ylim=c(min.yvalue, max.yvalue))
    if(!is.null(testData)) {
        lines(testData, col="red", lty=2)
        print(accuracy(predictions, testData))
    }
    # check if residuals looklike white noise
    Acf(residuals(fit), main="Residuals")
    # portmantaeu test
    print(Box.test(residuals(fit), lag=24, fitdf=4, type="Ljung"))
}
    
In [ ]:
    
# split the series into a test and a train set
series.train <- window(series.adj, end=train_stop)
series.test <- window(series.adj, start=test_start)
    
In [ ]:
    
modelArima(series.train, c(1, 0, 4), length(series.test), series.test)
    
In [ ]:
    
modelArima(series.train, c(3, 1, 12), length(series.test), series.test)
    
In [ ]:
    
adf.test(series)
    
In [ ]:
    
adf.test(series.adj)
    
In [ ]:
    
adf.test(series.diff)
    
In [ ]:
    
# let's try a BoxCox transform
est.lambda <- BoxCox.lambda(series.diff)
est.lambda
    
In [ ]:
    
# this is for diffed
adf.test(BoxCox(series.diff, lambda = est.lambda))
    
In [ ]:
    
# print the series out once
tsdisplay(BoxCox(series.diff, lambda = est.lambda))
    
In [ ]:
    
# let's retry the models with lambda
modelArima(series.train, c(1, 0, 4), length(series.test), series.test, lambda = BoxCox.lambda(series.adj))
    
In [ ]:
    
modelArima(series.train, c(3, 1, 12), length(series.test), series.test, est.lambda)
    
In [ ]:
    
# series = original data
# series.cleaned = outliers removed
# series.adj = original data, seasonally adjusted
# series.cleaned.adj = cleaned data, seasonally adjusted
# series.train = original seasonally adjusted data's train split
# series.test = original seasonally adjusted data's test split
# series.cleaned.train = cleaned seasonally adjusted data's train split
# series.cleaned.test = cleaned seasonally adjusted data's test split
# stl.fit = original data's stl
# stl.cleaned.fit = cleaned data's stl 
# tsdisplay(series.adj)
train_start = c(2012,4)
train_end = c(2015,6)
test_start = c(2015, 7)
test_end = c(2016, 6)
seasonal = stl.fit[[1]][,1]
seasonal_cleaned = stl.cleaned.fit[[1]][,1]
    
In [ ]:
    
## Function for finding the average of seasonal components
period_stat <- function(ts_data_in, type = 1, start_value, years){
    #type 1: sum
    #type 2: mean
    freq <- frequency(ts_data_in)
    len <- length(ts_data_in)
    freq_vector <- numeric(0)
    freq_sum <- numeric(0)
    vec <- numeric(0)
    sum_vec <- numeric(0)
    start_val <- start(ts_data_in)
    ts_data_in <- c(rep(NA,start_val[2] - 1),ts_data_in)
    max_limit <- ceiling(len/freq)
    for(i in 1:max_limit){
        vec <- ts_data_in[(((i-1)*freq)+1):(((i-1)*freq)+freq)]
        freq_vector <- as.numeric(!is.na(vec))
        vec[is.na(vec)] <- 0
        if(i == 1){
            sum_vec <- vec
            freq_sum <- freq_vector
            
        }else{
           
            sum_vec <- sum_vec + vec
            freq_sum <- freq_sum + freq_vector
        }
    }
    final_ts <- numeric(0)
    
    if(type == 1)
    {
        final_ts <- sum_vec
    }else if(type == 2) {
        final_ts <- (sum_vec/freq_sum)
    } else {
        stop("Invalid type")
    }
    return(ts(rep(final_ts,years),frequency = freq, start = start_value ))
}
    
In [ ]:
    
#Adjust the negative values in the ts data
min_ts_value <- min(series.adj)
min_ts_cleaned_value <- min(series.cleaned.adj)
bias_value <- (-1*min_ts_value) + 1
bias_value_cleaned <- (-1*min_ts_cleaned_value) + 1
#min(series)
#min(series.cleaned)
#min(series.adj)
#min(series.cleaned.adj)
ES_series <- series.adj + bias_value
ES_series_cleaned <- series.cleaned.adj + bias_value_cleaned
#plot(ES_series)
train_data_adj <- window(ES_series,start = train_start, end=train_end)
test_data_adj <- window(ES_series, start= test_start, end = test_end)
train_data_adj_cleaned <- window(ES_series_cleaned,start = train_start, end = train_end)
test_data_adj_cleaned <- window(ES_series_cleaned, start = test_start, end = test_end)
train_data <- window(series, start = train_start, end = train_end)
test_data <- window(series, start = test_start, end = test_end)
train_data_cleaned <- window(series.cleaned, start = train_start, end = train_end)
test_data_cleaned <- window(series.cleaned, start = test_start, end = test_end)
    
In [ ]:
    
#Getting the mean value from the seasonal components for the data set and not for the training set alone.
#Need to adjust based on the input from Suchana.
seasonal_mean <- period_stat(seasonal,2,c(2012,1),years = 7)
seasonal_cleaned_mean <- period_stat(seasonal_cleaned,2,c(2012,1),years = 7)
    
In [ ]:
    
#Preprocessing data. Removing 0 from the data
train_data_adj[train_data_adj==0]=0.01 
train_data_adj_cleaned[train_data_adj_cleaned==0]=0.01
    
In [ ]:
    
all_types = c("ANN","AAN","AAA","ANA","MNN","MAN","MNA","MAA","MMN","MNM","MMM","MAM")
forecast_values = 12
# For eg: AAA -> additive level, additive trend and additive seasonality
# ANN -> No trend or seasonality
    
In [ ]:
    
fit_function <- function(train_data, test_data)
{    
    all_fit <- list()
    test_models <- list()
    print("Fitting various models: ")
    for (bool in c(TRUE,FALSE)){
        for (model_type in all_types){
            if(bool & substr(model_type,2,2)=="N"){
                next
            }
        test_model = ets(train_data, model = model_type,damped = bool)
        #Box.test(test_model$residuals, lag = 20, type = "Ljung-Box")$p.value
        all_fit[[paste0("ETS Model: ",model_type,", Damped: ",bool)]][1] <- 
                                                    accuracy(f = forecast.ets(test_model,h=forecast_values)$mean,x = test_data)[5]
        all_fit[[paste0("ETS Model: ",model_type,", Damped: ",bool)]][2] <- 
                                                    100*(Box.test(test_model$residuals, lag = 20, type = "Ljung-Box")$p.value)
            
            test_models[[paste0("ETS Model: ",model_type,", Damped: ",bool)]] <- test_model
            print(test_model$method)
            print(accuracy(f = forecast.ets(test_model,h=forecast_values)$mean, x = test_data)[5])
            print("")
            #Excluding the models which has auto correlated residuals @ 10% significance level
        }
    }
    return(list(all_fit,test_models))
}
    
In [ ]:
    
# Fitting the models for all types of data - Original, cleaned, seasonally adjusted, cleaned - seasonally adjusted
models_adj <- fit_function(train_data_adj,test_data_adj) #Seasonally adjusted data
models_adj_cleaned <- fit_function(train_data_adj_cleaned,test_data_adj_cleaned) #Seasonally adjusted, cleaned(with outliers being removed) data
models <- fit_function(train_data,test_data) #Original data
models_cleaned <- fit_function(train_data_cleaned, test_data_cleaned) #Original, cleaned data
    
In [ ]:
    
all_fit_adj <- models_adj[[1]]
test_models_adj <- models_adj[[2]]
all_fit_adj_cleaned<- models_adj_cleaned[[1]]
test_models_adj_cleaned <- models_adj_cleaned[[2]]
all_fit <- models[[1]]
test_models <- models[[2]]
all_fit_cleaned <- models_cleaned[[1]]
test_models_cleaned <- models_cleaned[[2]]
    
In [ ]:
    
#Finding the best fit
proper_models <- all_fit_adj
best_mape <- min(sapply(proper_models,function(x)x[1]))
best_model <- names(which.min(sapply(proper_models,function(x)x[1])))
print(paste0("Best Model:",best_model))
print(paste0("Best Mape: ",best_mape))
#Finding top n fits
Top_n <- 3
if(length(proper_models)<3){Top_n <- length(proper_models)}
top_mape_val <- proper_models[order(sapply(proper_models, function(x)x[1]))][1:Top_n]
top_models_adj <- names(top_mape_val)
    
top_mape_val
seasonal_mean
    
In [ ]:
    
#Finding the best fit
proper_models <- all_fit_adj_cleaned
best_mape <- min(sapply(proper_models,function(x)x[1]))
best_model <- names(which.min(sapply(proper_models,function(x)x[1])))
print(paste0("Best Model:",best_model))
print(paste0("Best Mape: ",best_mape))
        
#Finding top n fits
#top_models <- c()
Top_n <- 3
if(length(proper_models)<3){Top_n <- length(proper_models)}
top_mape_val <- proper_models[order(sapply(proper_models, function(x)x[1]))][1:Top_n]
top_models_adj_cleaned <- names(top_mape_val)
    
top_mape_val
seasonal_cleaned_mean
    
In [ ]:
    
#Finding the best fit
proper_models <- all_fit
best_mape <- min(sapply(proper_models,function(x)x[1]))
best_model <- names(which.min(sapply(proper_models,function(x)x[1])))
print(paste0("Best Model:",best_model))
print(paste0("Best Mape: ",best_mape))
#Finding top n fits
Top_n <- 3
if(length(proper_models)<3){Top_n <- length(proper_models)}
top_mape_val <- proper_models[order(sapply(proper_models, function(x)x[1]))][1:Top_n]
top_models <- names(top_mape_val)
    
top_mape_val
seasonal_cleaned_mean
    
In [ ]:
    
#Finding the best fit
proper_models <- all_fit_cleaned
best_mape <- min(sapply(proper_models,function(x)x[1]))
best_model <- names(which.min(sapply(proper_models,function(x)x[1])))
print(paste0("Best Model:",best_model))
print(paste0("Best Mape: ",best_mape))
        
#Finding top n fits
Top_n <- 3
if(length(proper_models)<3){Top_n <- length(proper_models)}
top_mape_val <- proper_models[order(sapply(proper_models, function(x)x[1]))][1:Top_n]
top_models_cleaned <- names(top_mape_val)
    
top_mape_val
seasonal_cleaned_mean
    
In [ ]:
    
plot(ES_series,col = "black")
lines(test_data_adj, col = "blue")
lines(forecast.ets(test_models_adj[[top_models_adj[1]]],h=12)$mean, col = "red") #Top model
lines(forecast.ets(test_models_adj[[top_models_adj[2]]],h=12)$mean, col = "green") #Top second model
lines(forecast.ets(test_models_adj[[top_models_adj[3]]],h=12)$mean, col = "yellow") #Top third model
legend("topleft", lty=1,col = c("blue","red","green","yellow"),
                       c("Test data", "Best model", "Second best", "Third best"))
#Observation: Unusual peak at December'15. To check if it is an anomaly
    
In [ ]:
    
plot(ES_series_cleaned,col = "black")
lines(test_data_adj_cleaned, col = "blue")
lines(forecast.ets(test_models_adj_cleaned[[top_models_adj_cleaned[1]]],h=12)$mean, col = "red") #Top model
lines(forecast.ets(test_models_adj_cleaned[[top_models_adj_cleaned[2]]],h=12)$mean, col = "green") #Top second model
lines(forecast.ets(test_models_adj_cleaned[[top_models_adj_cleaned[3]]],h=12)$mean, col = "yellow") #Top third model
legend("topleft", lty=1,col = c("blue","red","green","yellow"),
                       c("Test data", "Best model", "Second best", "Third best"))
#Observation: Unusual peak at December'15. To check if it is an anomaly
    
In [ ]:
    
#all_fit
#test_models[[all_fit[1]]]
plot(series,col = "black")
lines(test_data, col = "blue")
top_models[1]
accuracy(test_models[[top_models[1]]])
top_models[2]
accuracy(test_models[[top_models[2]]])
top_models[3]
accuracy(test_models[[top_models[3]]])
lines(forecast.ets(test_models[[top_models[1]]],h=12)$mean, col = "red") #Top model
lines(forecast.ets(test_models[[top_models[2]]],h=12)$mean, col = "green") #Top second model
lines(forecast.ets(test_models[[top_models[3]]],h=12)$mean, col = "yellow") #Top third model
legend("topleft", lty=1,col = c("blue","red","green","yellow"),
                       c("Test data", "Best model", "Second best", "Third best"))
#Observation: Unusual peak at December'15. To check if it is an anomaly
    
In [ ]:
    
#plot(forecast.ets(test_models_cleaned[[top_models_cleaned[1]]],h=12))
accuracy(test_models_cleaned[[top_models_cleaned[1]]])
accuracy(test_models_cleaned[[top_models_cleaned[2]]])
accuracy(test_models_cleaned[[top_models_cleaned[3]]])
plot(series.cleaned,col = "black")
lines(test_data_cleaned, col = "blue")
lines(forecast.ets(test_models_cleaned[[top_models_cleaned[1]]],h=12)$mean, col = "red") #Top model
lines(forecast.ets(test_models_cleaned[[top_models_cleaned[2]]],h=12)$mean, col = "green") #Top second model
lines(forecast.ets(test_models_cleaned[[top_models_cleaned[3]]],h=12)$mean, col = "yellow") #Top third model
legend("topleft",lty=c(1,1,1,1,2),col = c("blue","red","green","yellow","brown"),
                       c("Test data(cleaned)", "Best model", "Second best", "Third best"))
#Observation: Unusual peak at December'15. To check if it is an anomaly
    
In [ ]:
    
print("Case 1: Seasonally adjusted data")
#Adding the bias value which was added to overcome the negative values
ES_series_bias <- ES_series - bias_value
test_series_bias <- test_data_adj - bias_value
forecast1_bias <- forecast.ets(test_models_adj[[top_models_adj[1]]],h=12)$mean - bias_value
forecast2_bias <- forecast.ets(test_models_adj[[top_models_adj[2]]],h=12)$mean - bias_value
forecast3_bias <- forecast.ets(test_models_adj[[top_models_adj[3]]],h=12)$mean - bias_value
#Adding back the seasonal value from stl decomposition
ES_value_adj <- ES_series_bias + seasonal
test_series_adj <- test_series_bias + seasonal
#Adding back the mean seasonal component to the forecasted data
forecast1_adj <- forecast1_bias + seasonal_mean
forecast2_adj <- forecast2_bias + seasonal_mean
forecast3_adj <- forecast3_bias + seasonal_mean
#Calculating the accuracy of the training data
accuracy(test_models_adj[[top_models_adj[1]]])
accuracy(test_models_adj[[top_models_adj[2]]])
accuracy(test_models_adj[[top_models_adj[3]]])
    
In [ ]:
    
#Checking the MAPE values with original data
print(paste0("Top model: ", top_models_adj[1]))
accuracy(forecast1_adj,test_series_adj)
print(paste0("Top model: ", top_models_adj[2]))
accuracy(forecast2_adj,test_series_adj)
print(paste0("Top model: ", top_models_adj[3]))
accuracy(forecast3_adj,test_series_adj)
#accuracy(test_data, forecast.ets(test_models[[top_models[3]]],h=12)$mean )
    
In [ ]:
    
print("Case 2: Seasonally adjusted & cleaned data")
#Adding the bias value which was added to overcome the negative values
ES_series_bias_cleaned <- ES_series_cleaned - bias_value_cleaned
test_series_bias_cleaned <- test_data_adj_cleaned - bias_value_cleaned
forecast1_bias <- forecast.ets(test_models_adj_cleaned[[top_models_adj_cleaned[1]]],h=12)$mean - bias_value_cleaned
forecast2_bias <- forecast.ets(test_models_adj_cleaned[[top_models_adj_cleaned[2]]],h=12)$mean - bias_value_cleaned
forecast3_bias <- forecast.ets(test_models_adj_cleaned[[top_models_adj_cleaned[3]]],h=12)$mean - bias_value_cleaned
#Adding back the seasonal value from stl decomposition
ES_value_adj_cleaned <- ES_series_bias_cleaned + seasonal_cleaned
test_series_adj_cleaned <- test_series_bias_cleaned + seasonal_cleaned
#Adding back the mean seasonal component to the forecasted data
forecast1_adj_cleaned <- forecast1_bias + seasonal_cleaned_mean
forecast2_adj_cleaned <- forecast2_bias + seasonal_cleaned_mean
forecast3_adj_cleaned <- forecast3_bias + seasonal_cleaned_mean
#Calculating the accuracy of the training data
accuracy(test_models_adj_cleaned[[top_models_adj_cleaned[1]]])
accuracy(test_models_adj_cleaned[[top_models_adj_cleaned[2]]])
accuracy(test_models_adj_cleaned[[top_models_adj_cleaned[3]]])
    
In [ ]:
    
#Checking the MAPE values with original data
print(paste0("Top model: ", top_models_adj_cleaned[1]))
accuracy(forecast1_adj_cleaned,test_series_adj_cleaned)
print(paste0("Top model: ", top_models_adj_cleaned[2]))
accuracy(forecast2_adj_cleaned,test_series_adj_cleaned)
print(paste0("Top model: ", top_models_adj_cleaned[3]))
accuracy(forecast3_adj_cleaned,test_series_adj_cleaned)
top_models
#accuracy(forecast.ets(test_models[[top_models[1]]],h=12)$mean, test_data)
#accuracy(test_data, forecast.ets(test_models[[top_models[3]]],h=12)$mean)
    
In [ ]:
    
#Ljung Box test - One of the checks to perform stationarity of TS data
# A small function
residual_analyis <- function(model_name){
    print(model_name)
    print(Box.test(test_models[[model_name]]$residuals, lag = 20, type = "Ljung-Box"))
    #p_value <- Box.test(test_models[[model_name]]$residuals, lag = 20, type = "Ljung-Box")
    Acf(test_models[[model_name]]$residuals, main = model_name)
    
}
    
In [ ]:
    
#Case 1: Seasonally adjusted models
#Residual Analysis for top three models
residual_analyis(top_models_adj[1]) #Top model
residual_analyis(top_models_adj[2]) #Second best model
residual_analyis(top_models_adj[3]) #Third best model
    
In [ ]:
    
#Case 2 - Seasonally adjusted cleaned models
#Residual Analysis for top three models
residual_analyis(top_models_adj_cleaned[1]) #Top model
residual_analyis(top_models_adj_cleaned[2]) #Second best model
residual_analyis(top_models_adj_cleaned[3]) #Third best model
    
In [ ]:
    
#Case 3 - Models on original data
#Residual Analysis for top three models
residual_analyis(top_models[1]) #Top model
residual_analyis(top_models[2]) #Second best model
residual_analyis(top_models[3]) #Third best model
    
In [ ]:
    
#Case 4 - Models on original data
#Residual Analysis for top three models
residual_analyis(top_models_cleaned[1]) #Top model
residual_analyis(top_models_cleaned[2]) #Second best model
residual_analyis(top_models_cleaned[3]) #Third best model
    
In [ ]:
    
plot(ES_value_adj,col = "black", ylab = "No of complaints", 
                 main = "Model with seasonal adjustment")
lines(test_series_adj, col = "blue") #Original test data
accuracy(forecast1_adj,test_series_adj)
accuracy(forecast2_adj,test_series_adj)
accuracy(forecast3_adj,test_series_adj)
lines(test_series_bias + seasonal_mean, col = "brown", lty =2) #Deseasonlised data with average seasonal component applied
lines(forecast1_adj, col = "red") #Top model
lines(forecast2_adj, col = "green") #Top second model
lines(forecast3_adj, col = "yellow") #Top third model
legend("topleft",lty=c(1,1,1,1,2),col = c("blue","red","green","yellow","brown"),
                       c("Test data", "Best model", "Second best", "Third best","test data with seasonal mean"))
    
In [ ]:
    
plot(ES_value_adj_cleaned,col = "black", ylab = "No of complaints",
                 main = "Model with seasonal adjustment and cleaning") 
lines(test_series_adj_cleaned, col = "blue") #Original test data
accuracy(forecast1_adj_cleaned,test_series_adj_cleaned)
accuracy(forecast2_adj_cleaned,test_series_adj_cleaned)
accuracy(forecast3_adj_cleaned,test_series_adj_cleaned)
lines(test_series_bias_cleaned + seasonal_cleaned_mean, col = "brown", lty =2) #Deseasonlised data with average seasonal component applied
lines(forecast1_adj_cleaned, col = "red") #Top model
lines(forecast2_adj_cleaned, col = "green") #Top second model
lines(forecast3_adj_cleaned, col = "yellow") #Top third model
legend("topleft",lty=c(1,1,1,1,2),col = c("blue","red","green","yellow","brown"),
                       c("Test data", "Best model", "Second best", "Third best","test data with seasonal mean"))
    
In [ ]:
    
plot(series,col = "black", ylab = "No of complaints",
                 main = "Model with original data") 
lines(test_data, col = "blue") #Originayl test data
accuracy(forecast.ets(test_models[[top_models[1]]],h=12)$mean,test_data)
accuracy(forecast.ets(test_models[[top_models[2]]],h=12)$mean,test_data)
accuracy(forecast.ets(test_models[[top_models[3]]],h=12)$mean,test_data)
lines(forecast.ets(test_models[[top_models[1]]],h=12)$mean, col = "red") #Top model
lines(forecast.ets(test_models[[top_models[2]]],h=12)$mean, col = "green") #Top second model
lines(forecast.ets(test_models[[top_models[3]]],h=12)$mean, col = "yellow") #Top third model
legend("topleft", lty=1,col = c("blue","red","green","yellow"),
                       c("Test data", "Best model", "Second best", "Third best"))
#Observation: Unusual peak at December'15. To check if it is an anomaly
legend("topleft",lty=c(1,1,1,1,2),col = c("blue","red","green","yellow","brown"),
                       c("Test data", "Best model", "Second best", "Third best","test data with seasonal mean"))
    
In [ ]:
    
plot(series.cleaned,col = "black", main = "Model with cleaned data")
lines(test_data_cleaned, col = "blue")
accuracy(forecast.ets(test_models_cleaned[[top_models_cleaned[1]]],h=12)$mean,test_data_cleaned)
accuracy(forecast.ets(test_models_cleaned[[top_models_cleaned[2]]],h=12)$mean,test_data_cleaned)
accuracy(forecast.ets(test_models_cleaned[[top_models_cleaned[3]]],h=12)$mean,test_data_cleaned)
lines(forecast.ets(test_models_cleaned[[top_models_cleaned[1]]],h=12)$mean, col = "red") #Top model
lines(forecast.ets(test_models_cleaned[[top_models_cleaned[2]]],h=12)$mean, col = "green") #Top second model
lines(forecast.ets(test_models_cleaned[[top_models_cleaned[3]]],h=12)$mean, col = "yellow") #Top third model
legend("topleft",lty=c(1,1,1,1,2),col = c("blue","red","green","yellow","brown"),
                       c("Test data(cleaned)", "Best model", "Second best", "Third best","Actual test data"))
#Observation: Unusual peak at December'15. To check if it is an anomaly
    
In [ ]: